	
********************************************************************************
** 	TITLE: b7_1_sag_roa_precrime.do
**
**	PROJECT: IGNITE
** 
**  PURPOSE: Saginaw ROA Cleaning for Pre-Covid Criminal Cases
********************************************************************************
		set sortseed 13

*****************************************************************Pre-Covid Crime
**Importing the Case numbers that came from Secure Server

import excel "$output_data/saginaw_case_precrim_22dec2023.xls", sheet("Sheet1") clear

rename A case_id

tempfile server

duplicates drop case_id, force
//none are dropped.

save `server', replace

**Importing the intermediate case number information that came out of ROA webscraping
	//Saginaw ROA Crosswalks

import delimited "$input_data/sag_roas/cases.csv", clear 

keep if _n < 0

tempfile scrape

//Creating an empty dataset to append to
saveold `scrape', replace 

forval t = 1(1)9{
import delimited "$input_data/sag_roas/precovcrime_cases0`t'.csv", clear 

append using `scrape'

saveold `scrape', replace
}
forval t = 10(1)13{
import delimited "$input_data/sag_roas/precovcrime_cases`t'.csv", clear 

append using `scrape'

saveold `scrape', replace
}

drop if case == "v1"

gen no_roa = (roa == "empty")

rename case case_id

drop left order

summ no_roa

drop if case_id == "A"

duplicates tag case_id,gen(how)

summ how

drop how 


saveold `scrape', replace

**Importing the case number information that came out of ROA parsing

import excel "$input_data/sag_roas/roasparsed_precrim.xlsx", sheet("Sheet1") firstrow clear 

duplicates drop case_id, force
//none are dropped.

merge 1:1 case_id using `scrape'

**Dropping the non-criminal cases that were scraped in my 0.1 percent testing sample
drop if _merge == 1 

codebook _merge no_roa

merge 1:1 case_id using `server',gen(server)


gen roa_present = (roa_case_id != "")
list case_id roa_present no_roa if roa_present == 1 & no_roa == 1

drop no_roa _merge server

*Creating diff_roa variable
replace roa_case_id = subinstr(roa_case_id, "-", "",.)
gen diff_roa = .
replace diff_roa = 1 if roa_case_id != case_id & roa_case_id != "" 
replace diff_roa = 0 if  roa_case_id == case_id & roa_case_id != "" 

 
gen c_court =  (strpos(court, "Circuit") > 0)
replace c_court = . if court == ""

gen d_court =  (strpos(court, "District") > 0)
replace d_court = . if court == ""

gen criminal_court =  (strpos(court, "Criminal") > 0)
replace criminal_court = . if court == ""


//there are also Open, INACTIVE, Bench Warrant Issued, and Suspended
gen case_closed =  (strpos(case_status, "CLOSED") > 0)
replace case_closed = . if case_status == ""


/*
cases that messed up in parsing:
*/

gen mess =  (strpos(date_filed, "Ordinance Misdemeanor Criminal") > 0 | strpos(date_filed, "Statute Misdemeanor Criminal") > 0 )
replace mess = . if date_filed == "" & date_filed == "" 

/*
Public Defender Definition source: 
https://www.notafraidtowin.com/court-appointed-attorney-vs-retained-attorney/
Retained lawyer is private while court-appointed is a public defender
*/

gen public_defense = (strpos(attorney_type, "Court Appointed") > 0)
replace public_defense = . if attorney_type == ""



/*
Disposition, cases ending in plea deals
*/

gen plea = 0
gen sentenced = 0
forval t = 1(1)4{
	replace plea = 1 if strpos(d_disp_time_event`t', "Plea") > 0
	replace sentenced = 1 if strpos(d_disp_time_event`t', "Sentence") > 0
}

replace plea = . if roa_case_id == ""

replace sentenced = . if roa_case_id == ""

//Pre-release e-monitoring
gen pre_emonitor = 0
forval t = 1(1)533{
	replace pre_emonitor = 1 if strpos(e_event_date`t', "MONITOR ON RELEASE") > 0
}

summ diff_roa case_closed c_court d_court criminal_court mess public_defense plea sentenced pre_emonitor roa_present
summ diff_roa if c_court == 1

//Cleaning event-dates and event-comments
forval t = 1(1)533{
replace e_event_date`t' = subinstr(e_event_date`t',char(34), "",.)
replace e_event_date`t' = subinstr(e_event_date`t',"[['\\n        ", "",.)
replace e_event_date`t' = subinstr(e_event_date`t',"\\n        \\n        ', ' ', '\\n      ']]", "",.)
gen e_date`t' = substr(e_event_date`t',1,10)
gen e_dated`t' = date(e_date`t', "MDY")
gen e_month`t' = mofd(e_dated`t')
gen e_event`t' = substr(e_event_date`t',11,.)
drop e_event_date`t'

replace e_event_comment`t' = subinstr(e_event_comment`t',char(34), "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', 'Comment', '\\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"['[\'\\n          \', \'\\n      \', \'Comment\', \\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    ', '\\n\\n    ']]", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    , \'\\n\\n    \']']", "",.)
}

forval t = 1(1)533{

replace e_event_comment`t' = subinstr(e_event_comment`t',char(34), "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', 'Comment', '\\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"['[\'\\n          \', \'\\n      \', \'Comment\', \\n      ", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"[['\\n          ', '\\n      ', '", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    ', '\\n\\n    ']]", "",.)
replace e_event_comment`t' = subinstr(e_event_comment`t',"\\n    , \'\\n\\n    \']']", "",.)

}



forval t = 1(1)28{
*d_disp_time_event1 
replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',char(34), "",.)
replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',"', ' ']]", "",.)

replace d_disp_time_event`t' = subinstr(d_disp_time_event`t',"[['", "",.)

replace d_disp_time_event`t' = subinstr(d_disp_time_event`t'," ', '", "",.)

gen d_disp_date`t' = substr(d_disp_time_event`t',1,10)
gen d_disp_dated`t' = date(d_disp_date`t', "MDY")
gen d_disp_month`t' = mofd(d_disp_dated`t')
gen d_disp_event`t' = substr(d_disp_time_event`t',11,.)
drop d_disp_time_event`t'

*d_disp_charge_action1
replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',char(34), "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"['[\'\\n\\n      \', \'\\n        \', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n\\n      \', \'\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n      \', \'\\n\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

replace d_disp_charge_action`t' = subinstr(d_disp_charge_action`t',"\\n\\n      \', \'\\n        \', \'\\n          \', \'Charge Number\', \'Charge Offense Description\', \'Description\', \'", "",.)

split d_disp_charge_action`t', parse(`"Query(function()"') generate(stub) limit(2)

drop stub2

replace d_disp_charge_action`t' = stub1

drop  stub1
}

order case_id roa_case_id court judge_name date_filed case_type case_status entitlement defendent attorney_name attorney_type c_* bs_* d_* e_*, first

//Saving Saginaw ROAs Pre-2020 Criminal offenses Parsed
saveold "$output_data/saginaw_roa_precrim_parsed.dta", replace



use "$output_data/saginaw_roa_precrim_parsed.dta", clear
preserve
keep if roa_present == 0 
keep case_id
export delimited using "$output_data/saginaw_case_precrim_missing_01032024.txt", replace

restore

preserve 
keep if diff_roa == 1 
keep case_id roa_case_id
export delimited using "$output_data/saginaw_case_precrim_diffroa_01032024.txt", replace

restore 